#######################################################################
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import math
import numpy as np
df = pd.read_csv("co2_emission_by_countries.csv", encoding="latin")
df1 = df.copy()
df = pd.read_csv("continent_data.csv")
df2 = df.copy()
df1.describe()
| Year | CO2 emission (Tons) | Population(2022) | Area | |
|---|---|---|---|---|
| count | 59620.000000 | 5.962000e+04 | 5.311600e+04 | 5.528400e+04 |
| mean | 1885.000000 | 1.034774e+09 | 3.992260e+07 | 6.522073e+05 |
| std | 78.231085 | 1.041652e+10 | 1.482365e+08 | 1.865483e+06 |
| min | 1750.000000 | 0.000000e+00 | 1.131200e+04 | 2.100000e+01 |
| 25% | 1817.000000 | 0.000000e+00 | 1.770414e+06 | 1.770450e+04 |
| 50% | 1885.000000 | 0.000000e+00 | 8.673095e+06 | 1.103815e+05 |
| 75% | 1953.000000 | 8.715092e+06 | 2.862920e+07 | 4.925730e+05 |
| max | 2020.000000 | 4.170000e+11 | 1.425887e+09 | 1.709824e+07 |
df1.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 59620 entries, 0 to 59619 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Country 59620 non-null object 1 Code 57452 non-null object 2 Calling Code 56097 non-null object 3 Year 59620 non-null int64 4 CO2 emission (Tons) 59620 non-null float64 5 Population(2022) 53116 non-null float64 6 Area 55284 non-null float64 7 % of World 55284 non-null object 8 Density(km2) 53116 non-null object dtypes: float64(3), int64(1), object(5) memory usage: 4.1+ MB
df1.shape
(59620, 9)
df1['Country'].value_counts().unique()
array([271], dtype=int64)
df1 = df1.drop(columns=['Calling Code','Code'])
df1
| Country | Year | CO2 emission (Tons) | Population(2022) | Area | % of World | Density(km2) | |
|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | 1750 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² |
| 1 | Afghanistan | 1751 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² |
| 2 | Afghanistan | 1752 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² |
| 3 | Afghanistan | 1753 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² |
| 4 | Afghanistan | 1754 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 59615 | Zimbabwe | 2016 | 736467042.0 | 16320537.0 | 390757.0 | 0.30% | 42/km² |
| 59616 | Zimbabwe | 2017 | 746048675.0 | 16320537.0 | 390757.0 | 0.30% | 42/km² |
| 59617 | Zimbabwe | 2018 | 757903042.0 | 16320537.0 | 390757.0 | 0.30% | 42/km² |
| 59618 | Zimbabwe | 2019 | 768852126.0 | 16320537.0 | 390757.0 | 0.30% | 42/km² |
| 59619 | Zimbabwe | 2020 | 779383468.0 | 16320537.0 | 390757.0 | 0.30% | 42/km² |
59620 rows × 7 columns
df1 = df1.rename(columns={'CO2 emission (Tons)': 'coTons', 'Population(2022)' : 'population'})
df1
| Country | Year | coTons | population | Area | % of World | Density(km2) | |
|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | 1750 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² |
| 1 | Afghanistan | 1751 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² |
| 2 | Afghanistan | 1752 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² |
| 3 | Afghanistan | 1753 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² |
| 4 | Afghanistan | 1754 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 59615 | Zimbabwe | 2016 | 736467042.0 | 16320537.0 | 390757.0 | 0.30% | 42/km² |
| 59616 | Zimbabwe | 2017 | 746048675.0 | 16320537.0 | 390757.0 | 0.30% | 42/km² |
| 59617 | Zimbabwe | 2018 | 757903042.0 | 16320537.0 | 390757.0 | 0.30% | 42/km² |
| 59618 | Zimbabwe | 2019 | 768852126.0 | 16320537.0 | 390757.0 | 0.30% | 42/km² |
| 59619 | Zimbabwe | 2020 | 779383468.0 | 16320537.0 | 390757.0 | 0.30% | 42/km² |
59620 rows × 7 columns
df1 = df1.sort_values('population', ascending = False)
dfSelect2020 = df1[df1['Year'] == 2020]
dfSelect2020
| Country | Year | coTons | population | Area | % of World | Density(km2) | |
|---|---|---|---|---|---|---|---|
| 10839 | China | 2020 | 2.360000e+11 | 1.425887e+09 | 9706961.0 | 6.30% | 147/km² |
| 24389 | India | 2020 | 5.441649e+10 | 1.417173e+09 | 3287590.0 | 2.00% | 431/km² |
| 57180 | United States | 2020 | 4.170000e+11 | 3.382899e+08 | 9372610.0 | 6.10% | 36/km² |
| 24660 | Indonesia | 2020 | 1.439996e+10 | 2.755013e+08 | 1904569.0 | 1.20% | 145/km² |
| 40649 | Pakistan | 2020 | 5.162289e+09 | 2.358249e+08 | 881912.0 | 0.50% | 267/km² |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 45798 | Saint Pierre and Miquelon | 2020 | 3.676592e+06 | NaN | 242.0 | 0.00% | NaN |
| 48508 | Sint Maarten (Dutch part) | 2020 | 6.770353e+07 | NaN | NaN | NaN | NaN |
| 53657 | Timor | 2020 | 6.543008e+06 | NaN | NaN | NaN | NaN |
| 55554 | Turks and Caicos Islands | 2020 | 3.930547e+06 | NaN | 948.0 | 0.00% | NaN |
| 58806 | Wallis and Futuna | 2020 | 5.072470e+05 | NaN | NaN | NaN | NaN |
220 rows × 7 columns
dfNull2020 = dfSelect2020[dfSelect2020.isnull().any(axis=1)]
dfNull2020['Country']
23576 Hong Kong 41191 Palestine 39836 North Macedonia 27912 Kosovo 43901 Reunion 9755 Cape Verde 7858 Brunei 1625 Anguilla 1896 Antarctica 2980 Aruba 6232 Bermuda 11110 Christmas Island 11923 Congo 12194 Cook Islands 12736 Cote d'Ivoire 13549 Curacao 14091 Czechia 14362 Democratic Republic of Congo 17885 Faeroe Islands 19240 French Polynesia 22492 Guinea-Bissau 31164 Macao 35771 Montserrat 37939 New Caledonia 39294 Niue 44985 Saint Helena 45798 Saint Pierre and Miquelon 48508 Sint Maarten (Dutch part) 53657 Timor 55554 Turks and Caicos Islands 58806 Wallis and Futuna Name: Country, dtype: object
len(dfNull2020.index)
31
the31Union = dfNull2020['coTons'].sum()
theMostPoluating = dfSelect2020.nlargest(1, 'coTons')
x_value = [theMostPoluating['Country'].values[0], 'U34']
y_value = [theMostPoluating['coTons'].values[0], the31Union]
plt.bar( x_value, y_value)
plt.show()
dfSorted2020 = dfSelect2020.sort_values('coTons', ascending=True)
dfSorted2020
| Country | Year | coTons | population | Area | % of World | Density(km2) | |
|---|---|---|---|---|---|---|---|
| 1896 | Antarctica | 2020 | 1.538880e+05 | NaN | NaN | NaN | NaN |
| 43359 | Puerto Rico | 2020 | 2.088480e+05 | 3.252407e+06 | 8870.0 | 0.00% | 367/km² |
| 55825 | Tuvalu | 2020 | 2.789250e+05 | 1.131200e+04 | 26.0 | 0.00% | 435/km² |
| 39294 | Niue | 2020 | 2.901480e+05 | NaN | 260.0 | 0.00% | NaN |
| 44985 | Saint Helena | 2020 | 3.792950e+05 | NaN | 394.0 | 0.00% | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 56909 | United Kingdom | 2020 | 7.816115e+10 | 6.750894e+07 | 242900.0 | 0.20% | 278/km² |
| 20324 | Germany | 2020 | 9.263562e+10 | 8.336984e+07 | 357114.0 | 0.20% | 233/km² |
| 44443 | Russia | 2020 | 1.150000e+11 | 1.447133e+08 | 17098242.0 | 11.00% | 8/km² |
| 10839 | China | 2020 | 2.360000e+11 | 1.425887e+09 | 9706961.0 | 6.30% | 147/km² |
| 57180 | United States | 2020 | 4.170000e+11 | 3.382899e+08 | 9372610.0 | 6.10% | 36/km² |
220 rows × 7 columns
i = 0
countriesSumsCo2 = 0
dfCountriesSums = pd.DataFrame(columns=df1.columns)
while theMostPoluating['coTons'].values[0] > countriesSumsCo2 :
countriesSumsCo2 += dfSorted2020['coTons'].values[i]
row = pd.DataFrame([dfSorted2020.iloc[i]])
dfCountriesSums = pd.concat([dfCountriesSums, row])
i += 1
print(str(i) + " " + str(countriesSumsCo2))
207 418617496416.0
dfCountriesSums
| Country | Year | coTons | population | Area | % of World | Density(km2) | |
|---|---|---|---|---|---|---|---|
| 1896 | Antarctica | 2020 | 1.538880e+05 | NaN | NaN | NaN | NaN |
| 43359 | Puerto Rico | 2020 | 2.088480e+05 | 3252407.0 | 8870.0 | 0.00% | 367/km² |
| 55825 | Tuvalu | 2020 | 2.789250e+05 | 11312.0 | 26.0 | 0.00% | 435/km² |
| 39294 | Niue | 2020 | 2.901480e+05 | NaN | 260.0 | 0.00% | NaN |
| 44985 | Saint Helena | 2020 | 3.792950e+05 | NaN | 394.0 | 0.00% | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 7587 | Brazil | 2020 | 1.623851e+10 | 215313498.0 | 8515767.0 | 5.60% | 25/km² |
| 50134 | South Korea | 2020 | 1.833571e+10 | 51815810.0 | 100210.0 | 0.10% | 517/km² |
| 3251 | Australia | 2020 | 1.863575e+10 | 26177413.0 | 7692024.0 | 5.20% | 3/km² |
| 24931 | Iran | 2020 | 1.891444e+10 | 88550570.0 | 1648195.0 | 1.10% | 54/km² |
| 34416 | Mexico | 2020 | 2.007501e+10 | 127504125.0 | 1964375.0 | 1.30% | 65/km² |
207 rows × 7 columns
df2020Big = dfSelect2020.nlargest(10, 'coTons')
df2020Big
| Country | Year | coTons | population | Area | % of World | Density(km2) | |
|---|---|---|---|---|---|---|---|
| 57180 | United States | 2020 | 4.170000e+11 | 3.382899e+08 | 9372610.0 | 6.10% | 36/km² |
| 10839 | China | 2020 | 2.360000e+11 | 1.425887e+09 | 9706961.0 | 6.30% | 147/km² |
| 44443 | Russia | 2020 | 1.150000e+11 | 1.447133e+08 | 17098242.0 | 11.00% | 8/km² |
| 20324 | Germany | 2020 | 9.263562e+10 | 8.336984e+07 | 357114.0 | 0.20% | 233/km² |
| 56909 | United Kingdom | 2020 | 7.816115e+10 | 6.750894e+07 | 242900.0 | 0.20% | 278/km² |
| 26557 | Japan | 2020 | 6.562517e+10 | 1.239517e+08 | 377930.0 | 0.20% | 328/km² |
| 24389 | India | 2020 | 5.441649e+10 | 1.417173e+09 | 3287590.0 | 2.00% | 431/km² |
| 18698 | France | 2020 | 3.872868e+10 | 6.462663e+07 | 551695.0 | 0.40% | 117/km² |
| 9484 | Canada | 2020 | 3.357674e+10 | 3.845433e+07 | 9984670.0 | 6.10% | 4/km² |
| 56367 | Ukraine | 2020 | 3.055916e+10 | 3.970174e+07 | 603500.0 | 0.40% | 66/km² |
allYearCO2Big = df1[df1['Country'].isin(df2020Big['Country'])]
allYearCO2Big
| Country | Year | coTons | population | Area | % of World | Density(km2) | |
|---|---|---|---|---|---|---|---|
| 10839 | China | 2020 | 2.360000e+11 | 1.425887e+09 | 9706961.0 | 6.30% | 147/km² |
| 10754 | China | 1935 | 8.434728e+08 | 1.425887e+09 | 9706961.0 | 6.30% | 147/km² |
| 10740 | China | 1921 | 3.175662e+08 | 1.425887e+09 | 9706961.0 | 6.30% | 147/km² |
| 10741 | China | 1922 | 3.441998e+08 | 1.425887e+09 | 9706961.0 | 6.30% | 147/km² |
| 10742 | China | 1923 | 3.763514e+08 | 1.425887e+09 | 9706961.0 | 6.30% | 147/km² |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 9309 | Canada | 1845 | 2.235040e+05 | 3.845433e+07 | 9984670.0 | 6.10% | 4/km² |
| 9310 | Canada | 1846 | 2.491520e+05 | 3.845433e+07 | 9984670.0 | 6.10% | 4/km² |
| 9311 | Canada | 1847 | 2.821280e+05 | 3.845433e+07 | 9984670.0 | 6.10% | 4/km² |
| 9483 | Canada | 2019 | 3.304092e+10 | 3.845433e+07 | 9984670.0 | 6.10% | 4/km² |
| 9349 | Canada | 1885 | 7.003736e+07 | 3.845433e+07 | 9984670.0 | 6.10% | 4/km² |
2710 rows × 7 columns
pivot_table = allYearCO2Big.pivot_table(values='coTons', index='Year', columns='Country')
pivot_table.plot()
plt.ylabel('CO2 TONS')
plt.title('CO2 Emissions over time')
plt.show()
dfCentury19 = allYearCO2Big[(allYearCO2Big['Year'] >= 1800) & (allYearCO2Big['Year'] <= 1900)]
pivot_table = dfCentury19.pivot_table(values='coTons', index='Year', columns='Country')
pivot_table.plot()
plt.ylabel('CO2 TONS')
plt.title('CO2 Emissions over time')
plt.show()
dfCentury20 = allYearCO2Big[(allYearCO2Big['Year'] >= 1900) & (allYearCO2Big['Year'] <= 2000)]
pivot_table = dfCentury20.pivot_table(values='coTons', index='Year', columns='Country')
pivot_table.plot()
plt.ylabel('CO2 TONS')
plt.title('CO2 Emissions over time')
plt.show()
fig = px.line(pivot_table, x=pivot_table.index, y=pivot_table.columns[0:])
fig.show()
dfCentury21 = allYearCO2Big[(allYearCO2Big['Year'] >= 2000) & (allYearCO2Big['Year'] <= 2020)]
pivot_table = dfCentury21.pivot_table(values='coTons', index='Year', columns='Country')
pivot_table
| Country | Canada | China | France | Germany | India | Japan | Russia | Ukraine | United Kingdom | United States |
|---|---|---|---|---|---|---|---|---|---|---|
| Year | ||||||||||
| 2000 | 2.218015e+10 | 7.488403e+10 | 3.142863e+10 | 7.621667e+10 | 1.893972e+10 | 4.100939e+10 | 8.332305e+10 | 2.494736e+10 | 6.847098e+10 | 3.040000e+11 |
| 2001 | 2.273920e+10 | 7.839897e+10 | 3.183916e+10 | 7.713332e+10 | 1.993228e+10 | 4.225916e+10 | 8.483076e+10 | 2.525092e+10 | 6.904729e+10 | 3.100000e+11 |
| 2002 | 2.330387e+10 | 8.227156e+10 | 3.224517e+10 | 7.803329e+10 | 2.095531e+10 | 4.353828e+10 | 8.632645e+10 | 2.554660e+10 | 6.960736e+10 | 3.160000e+11 |
| 2003 | 2.388621e+10 | 8.681658e+10 | 3.265712e+10 | 7.893444e+10 | 2.201493e+10 | 4.482575e+10 | 8.785206e+10 | 2.585361e+10 | 7.017883e+10 | 3.220000e+11 |
| 2004 | 2.446697e+10 | 9.204033e+10 | 3.307027e+10 | 7.982153e+10 | 2.314040e+10 | 4.610861e+10 | 8.938275e+10 | 2.616398e+10 | 7.075173e+10 | 3.280000e+11 |
| 2005 | 2.504282e+10 | 9.791689e+10 | 3.348578e+10 | 8.068823e+10 | 2.432635e+10 | 4.739898e+10 | 9.093035e+10 | 2.647710e+10 | 7.132169e+10 | 3.340000e+11 |
| 2006 | 2.561423e+10 | 1.040000e+11 | 3.389154e+10 | 8.156655e+10 | 2.558610e+10 | 4.866634e+10 | 9.253690e+10 | 2.680977e+10 | 7.188926e+10 | 3.400000e+11 |
| 2007 | 2.620889e+10 | 1.110000e+11 | 3.428697e+10 | 8.241817e+10 | 2.694425e+10 | 4.996948e+10 | 9.414145e+10 | 2.714613e+10 | 7.244850e+10 | 3.470000e+11 |
| 2008 | 2.678826e+10 | 1.190000e+11 | 3.467496e+10 | 8.327310e+10 | 2.840706e+10 | 5.120179e+10 | 9.577837e+10 | 2.747166e+10 | 7.299300e+10 | 3.530000e+11 |
| 2009 | 2.733487e+10 | 1.270000e+11 | 3.504359e+10 | 8.406339e+10 | 3.001988e+10 | 5.236500e+10 | 9.730733e+10 | 2.774896e+10 | 7.348689e+10 | 3.580000e+11 |
| 2010 | 2.789368e+10 | 1.350000e+11 | 3.542017e+10 | 8.489634e+10 | 3.169777e+10 | 5.357982e+10 | 9.892044e+10 | 2.804303e+10 | 7.399853e+10 | 3.640000e+11 |
| 2011 | 2.846355e+10 | 1.450000e+11 | 3.577391e+10 | 8.570556e+10 | 3.347790e+10 | 5.484469e+10 | 1.010000e+11 | 2.835099e+10 | 7.446798e+10 | 3.690000e+11 |
| 2012 | 2.903281e+10 | 1.550000e+11 | 3.612984e+10 | 8.651954e+10 | 3.544148e+10 | 5.615070e+10 | 1.020000e+11 | 2.865496e+10 | 7.495550e+10 | 3.750000e+11 |
| 2013 | 2.960651e+10 | 1.650000e+11 | 3.648737e+10 | 8.735100e+10 | 3.747842e+10 | 5.746604e+10 | 1.040000e+11 | 2.895221e+10 | 7.543288e+10 | 3.800000e+11 |
| 2014 | 3.017707e+10 | 1.750000e+11 | 3.681326e+10 | 8.814359e+10 | 3.966428e+10 | 5.872977e+10 | 1.060000e+11 | 2.920972e+10 | 7.587142e+10 | 3.860000e+11 |
| 2015 | 3.075013e+10 | 1.840000e+11 | 3.714317e+10 | 8.893920e+10 | 4.193284e+10 | 5.995316e+10 | 1.070000e+11 | 2.943354e+10 | 7.629376e+10 | 3.910000e+11 |
| 2016 | 3.130970e+10 | 1.940000e+11 | 3.747675e+10 | 8.973988e+10 | 4.431507e+10 | 6.115688e+10 | 1.090000e+11 | 2.966755e+10 | 7.669310e+10 | 3.960000e+11 |
| 2017 | 3.187906e+10 | 2.040000e+11 | 3.781374e+10 | 9.052577e+10 | 4.674892e+10 | 6.234500e+10 | 1.100000e+11 | 2.989064e+10 | 7.708067e+10 | 4.010000e+11 |
| 2018 | 3.245853e+10 | 2.140000e+11 | 3.813612e+10 | 9.127988e+10 | 4.934873e+10 | 6.348846e+10 | 1.120000e+11 | 3.012267e+10 | 7.746256e+10 | 4.070000e+11 |
| 2019 | 3.304092e+10 | 2.250000e+11 | 3.845205e+10 | 9.199130e+10 | 5.197470e+10 | 6.459439e+10 | 1.140000e+11 | 3.034525e+10 | 7.783157e+10 | 4.120000e+11 |
| 2020 | 3.357674e+10 | 2.360000e+11 | 3.872868e+10 | 9.263562e+10 | 5.441649e+10 | 6.562517e+10 | 1.150000e+11 | 3.055916e+10 | 7.816115e+10 | 4.170000e+11 |
pivot_table.plot()
plt.ylabel('CO2 TONS')
plt.title('CO2 Emissions over time')
plt.show()
pivot_table.plot(figsize=(15, 5), rot=30)
plt.xticks(ticks=pivot_table.index, labels=pivot_table.index.astype(int))
plt.ylabel('CO2 TONS')
plt.title('CO2 Emissions over time')
plt.show()
df2020BigByArea = df2020Big
df2020BigByArea['coByArea'] = (df2020BigByArea['coTons'] / df2020BigByArea['Area'])
df2020BigByArea
| Country | Year | coTons | population | Area | % of World | Density(km2) | coByArea | |
|---|---|---|---|---|---|---|---|---|
| 57180 | United States | 2020 | 4.170000e+11 | 3.382899e+08 | 9372610.0 | 6.10% | 36/km² | 44491.342326 |
| 10839 | China | 2020 | 2.360000e+11 | 1.425887e+09 | 9706961.0 | 6.30% | 147/km² | 24312.449592 |
| 44443 | Russia | 2020 | 1.150000e+11 | 1.447133e+08 | 17098242.0 | 11.00% | 8/km² | 6725.837662 |
| 20324 | Germany | 2020 | 9.263562e+10 | 8.336984e+07 | 357114.0 | 0.20% | 233/km² | 259400.681847 |
| 56909 | United Kingdom | 2020 | 7.816115e+10 | 6.750894e+07 | 242900.0 | 0.20% | 278/km² | 321783.226167 |
| 26557 | Japan | 2020 | 6.562517e+10 | 1.239517e+08 | 377930.0 | 0.20% | 328/km² | 173643.708930 |
| 24389 | India | 2020 | 5.441649e+10 | 1.417173e+09 | 3287590.0 | 2.00% | 431/km² | 16552.091045 |
| 18698 | France | 2020 | 3.872868e+10 | 6.462663e+07 | 551695.0 | 0.40% | 117/km² | 70199.441347 |
| 9484 | Canada | 2020 | 3.357674e+10 | 3.845433e+07 | 9984670.0 | 6.10% | 4/km² | 3362.829551 |
| 56367 | Ukraine | 2020 | 3.055916e+10 | 3.970174e+07 | 603500.0 | 0.40% | 66/km² | 50636.547377 |
df2020BigByArea.plot.bar(x='Country', y='coByArea', rot=30, figsize=(10,5))
plt.show()
allYearCoAreaBig = allYearCO2Big.copy()
allYearCoAreaBig['coByArea'] = (allYearCoAreaBig['coTons'] / allYearCoAreaBig['Area'])
allYearCoAreaBig
| Country | Year | coTons | population | Area | % of World | Density(km2) | coByArea | |
|---|---|---|---|---|---|---|---|---|
| 10839 | China | 2020 | 2.360000e+11 | 1.425887e+09 | 9706961.0 | 6.30% | 147/km² | 24312.449592 |
| 10754 | China | 1935 | 8.434728e+08 | 1.425887e+09 | 9706961.0 | 6.30% | 147/km² | 86.893604 |
| 10740 | China | 1921 | 3.175662e+08 | 1.425887e+09 | 9706961.0 | 6.30% | 147/km² | 32.715307 |
| 10741 | China | 1922 | 3.441998e+08 | 1.425887e+09 | 9706961.0 | 6.30% | 147/km² | 35.459071 |
| 10742 | China | 1923 | 3.763514e+08 | 1.425887e+09 | 9706961.0 | 6.30% | 147/km² | 38.771292 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 9309 | Canada | 1845 | 2.235040e+05 | 3.845433e+07 | 9984670.0 | 6.10% | 4/km² | 0.022385 |
| 9310 | Canada | 1846 | 2.491520e+05 | 3.845433e+07 | 9984670.0 | 6.10% | 4/km² | 0.024953 |
| 9311 | Canada | 1847 | 2.821280e+05 | 3.845433e+07 | 9984670.0 | 6.10% | 4/km² | 0.028256 |
| 9483 | Canada | 2019 | 3.304092e+10 | 3.845433e+07 | 9984670.0 | 6.10% | 4/km² | 3309.164984 |
| 9349 | Canada | 1885 | 7.003736e+07 | 3.845433e+07 | 9984670.0 | 6.10% | 4/km² | 7.014489 |
2710 rows × 8 columns
pivot_table = allYearCoAreaBig.pivot_table(values='coByArea', index='Year', columns='Country')
pivot_table.plot()
plt.ylabel('CO2 TONS')
plt.title('CO2 Emissions over time')
plt.show()
fig = px.line(pivot_table, x=pivot_table.index, y=pivot_table.columns[0:])
fig.show()
df1['co2ForYear'] = math.nan
df1 = df1.sort_values(['Country','Year'], ascending=[True,True])
df1['co2ForYear'] = df1.groupby('Country')['coTons'].diff()
df1['co2ForYear'] = df1['co2ForYear'].fillna(df1['coTons'])
df1
| Country | Year | coTons | population | Area | % of World | Density(km2) | co2ForYear | |
|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | 1750 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² | 0.0 |
| 1 | Afghanistan | 1751 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² | 0.0 |
| 2 | Afghanistan | 1752 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² | 0.0 |
| 3 | Afghanistan | 1753 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² | 0.0 |
| 4 | Afghanistan | 1754 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 59615 | Zimbabwe | 2016 | 736467042.0 | 16320537.0 | 390757.0 | 0.30% | 42/km² | 10737567.0 |
| 59616 | Zimbabwe | 2017 | 746048675.0 | 16320537.0 | 390757.0 | 0.30% | 42/km² | 9581633.0 |
| 59617 | Zimbabwe | 2018 | 757903042.0 | 16320537.0 | 390757.0 | 0.30% | 42/km² | 11854367.0 |
| 59618 | Zimbabwe | 2019 | 768852126.0 | 16320537.0 | 390757.0 | 0.30% | 42/km² | 10949084.0 |
| 59619 | Zimbabwe | 2020 | 779383468.0 | 16320537.0 | 390757.0 | 0.30% | 42/km² | 10531342.0 |
59620 rows × 8 columns
mostCoByYear = df1[df1['Country'].isin(df2020Big['Country'].values)]
mostCoByYear
| Country | Year | coTons | population | Area | % of World | Density(km2) | co2ForYear | |
|---|---|---|---|---|---|---|---|---|
| 9214 | Canada | 1750 | 0.000000e+00 | 38454327.0 | 9984670.0 | 6.10% | 4/km² | 0.000000e+00 |
| 9215 | Canada | 1751 | 0.000000e+00 | 38454327.0 | 9984670.0 | 6.10% | 4/km² | 0.000000e+00 |
| 9216 | Canada | 1752 | 0.000000e+00 | 38454327.0 | 9984670.0 | 6.10% | 4/km² | 0.000000e+00 |
| 9217 | Canada | 1753 | 0.000000e+00 | 38454327.0 | 9984670.0 | 6.10% | 4/km² | 0.000000e+00 |
| 9218 | Canada | 1754 | 0.000000e+00 | 38454327.0 | 9984670.0 | 6.10% | 4/km² | 0.000000e+00 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 57176 | United States | 2016 | 3.960000e+11 | 338289857.0 | 9372610.0 | 6.10% | 36/km² | 5.000000e+09 |
| 57177 | United States | 2017 | 4.010000e+11 | 338289857.0 | 9372610.0 | 6.10% | 36/km² | 5.000000e+09 |
| 57178 | United States | 2018 | 4.070000e+11 | 338289857.0 | 9372610.0 | 6.10% | 36/km² | 6.000000e+09 |
| 57179 | United States | 2019 | 4.120000e+11 | 338289857.0 | 9372610.0 | 6.10% | 36/km² | 5.000000e+09 |
| 57180 | United States | 2020 | 4.170000e+11 | 338289857.0 | 9372610.0 | 6.10% | 36/km² | 5.000000e+09 |
2710 rows × 8 columns
pivot_table = mostCoByYear.pivot_table(index = 'Year', values='co2ForYear',columns='Country' )
fig = px.line(pivot_table, x=pivot_table.index, y=pivot_table.columns[0:])
fig.show()
df2test = df2[df2['Entity'].str.contains('united', case=False)]
df2test
| Entity | Code | Year | Continent | |
|---|---|---|---|---|
| 263 | United Arab Emirates | ARE | 2015 | Asia |
| 264 | United Kingdom | GBR | 2015 | Europe |
| 265 | United States | USA | 2015 | North America |
| 266 | United States Minor Outlying Islands | UMI | 2015 | Oceania |
| 267 | United States Virgin Islands | VIR | 2015 | North America |
df1['Continent'] = math.nan
df1.head(10)
| Country | Year | coTons | population | Area | % of World | Density(km2) | co2ForYear | Continent | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | 1750 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² | 0.0 | NaN |
| 1 | Afghanistan | 1751 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² | 0.0 | NaN |
| 2 | Afghanistan | 1752 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² | 0.0 | NaN |
| 3 | Afghanistan | 1753 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² | 0.0 | NaN |
| 4 | Afghanistan | 1754 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² | 0.0 | NaN |
| 5 | Afghanistan | 1755 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² | 0.0 | NaN |
| 6 | Afghanistan | 1756 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² | 0.0 | NaN |
| 7 | Afghanistan | 1757 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² | 0.0 | NaN |
| 8 | Afghanistan | 1758 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² | 0.0 | NaN |
| 9 | Afghanistan | 1759 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² | 0.0 | NaN |
df1[df1['Country'].isin(df2['Entity'])]
| Country | Year | coTons | population | Area | % of World | Density(km2) | co2ForYear | Continent | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | 1750 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² | 0.0 | NaN |
| 1 | Afghanistan | 1751 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² | 0.0 | NaN |
| 2 | Afghanistan | 1752 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² | 0.0 | NaN |
| 3 | Afghanistan | 1753 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² | 0.0 | NaN |
| 4 | Afghanistan | 1754 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² | 0.0 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 59615 | Zimbabwe | 2016 | 736467042.0 | 16320537.0 | 390757.0 | 0.30% | 42/km² | 10737567.0 | NaN |
| 59616 | Zimbabwe | 2017 | 746048675.0 | 16320537.0 | 390757.0 | 0.30% | 42/km² | 9581633.0 | NaN |
| 59617 | Zimbabwe | 2018 | 757903042.0 | 16320537.0 | 390757.0 | 0.30% | 42/km² | 11854367.0 | NaN |
| 59618 | Zimbabwe | 2019 | 768852126.0 | 16320537.0 | 390757.0 | 0.30% | 42/km² | 10949084.0 | NaN |
| 59619 | Zimbabwe | 2020 | 779383468.0 | 16320537.0 | 390757.0 | 0.30% | 42/km² | 10531342.0 | NaN |
58807 rows × 9 columns
df1[~df1['Country'].isin(df2['Entity'])]['Country'].unique()
array(['Faeroe Islands', 'Micronesia', 'Timor'], dtype=object)
concat = pd.DataFrame({'Entity':['Faeroe Islands','Micronesia','Timor'], 'Code': math.nan, 'Year': math.nan, 'Continent':['Europe', 'Oceania', 'Asia']} )
df2 = pd.concat([df2, concat])
df2
| Entity | Code | Year | Continent | |
|---|---|---|---|---|
| 0 | Abkhazia | OWID_ABK | 2015.0 | Asia |
| 1 | Afghanistan | AFG | 2015.0 | Asia |
| 2 | Akrotiri and Dhekelia | OWID_AKD | 2015.0 | Asia |
| 3 | Aland Islands | ALA | 2015.0 | Europe |
| 4 | Albania | ALB | 2015.0 | Europe |
| ... | ... | ... | ... | ... |
| 283 | Zanzibar | OWID_ZAN | 2015.0 | Africa |
| 284 | Zimbabwe | ZWE | 2015.0 | Africa |
| 0 | Faeroe Islands | NaN | NaN | Europe |
| 1 | Micronesia | NaN | NaN | Oceania |
| 2 | Timor | NaN | NaN | Asia |
288 rows × 4 columns
df2.drop(['Code', 'Year'], axis=1, inplace=True)
df2.head()
| Entity | Continent | |
|---|---|---|
| 0 | Abkhazia | Asia |
| 1 | Afghanistan | Asia |
| 2 | Akrotiri and Dhekelia | Asia |
| 3 | Aland Islands | Europe |
| 4 | Albania | Europe |
df2.reset_index(drop=True, inplace=True)
df1['Country'].dtype
dtype('O')
df2_dic = df2.set_index('Entity')['Continent'].to_dict()
df1['Continent'] = df1['Country'].map(df2_dic)
df1.groupby('Country').first().head(10)
| Year | coTons | population | Area | % of World | Density(km2) | co2ForYear | Continent | |
|---|---|---|---|---|---|---|---|---|
| Country | ||||||||
| Afghanistan | 1750 | 0.0 | 41128771.0 | 652230.0 | 0.40% | 63/km² | 0.0 | Asia |
| Albania | 1750 | 0.0 | 2842321.0 | 28748.0 | 0.00% | 99/km² | 0.0 | Europe |
| Algeria | 1750 | 0.0 | 44903225.0 | 2381741.0 | 1.60% | 19/km² | 0.0 | Africa |
| Andorra | 1750 | 0.0 | 79824.0 | 468.0 | 0.00% | 171/km² | 0.0 | Europe |
| Angola | 1750 | 0.0 | 35588987.0 | 1246700.0 | 0.80% | 29/km² | 0.0 | Africa |
| Anguilla | 1750 | 0.0 | NaN | 91.0 | 0.00% | None | 0.0 | North America |
| Antarctica | 1750 | 0.0 | NaN | NaN | None | None | 0.0 | Antarctica |
| Antigua and Barbuda | 1750 | 0.0 | 93763.0 | 442.0 | 0.00% | 212/km² | 0.0 | North America |
| Argentina | 1750 | 0.0 | 45510318.0 | 2780400.0 | 1.80% | 16/km² | 0.0 | South America |
| Armenia | 1750 | 0.0 | 2780469.0 | 29743.0 | 0.00% | 93/km² | 0.0 | Asia |
def getPlot (df,category,type, plotType, mark):
if plotType == 'bar':
#sum value for getting value for continent
if category == 'Continent':
df = df.groupby(['Year',category])[type].sum().reset_index()
fig = px.bar(df, x='Year', y=type, color = category, barmode='group', title=(type + ' By ' + category))
fig.show()
if plotType == 'line':
fig = px.line(df, x=df.index, y=df.columns[0:], markers = mark, title=(type + ' By ' + category))
fig.show()
def getPivot(df, category, type):
pivot_table = None
#pivot table with sum of all '$type' have the same '$category'
if category == 'Continent':
pivot_table = df.pivot_table(index='Year', columns=category, values=type, aggfunc='sum')
else:
pivot_table = df.pivot_table(index='Year', columns=category, values=type)
return pivot_table
def getFilter(year, category, range, list, df):
# set on first letter for prevent error
list = [item.capitalize() for item in list]
#set a dataframe for a range of value
if range == 'Yes' :
df_filtered = df[df[category].isin(list) & df['Year'].between(year[0], year[1])]
#set a dataframe for a range of value take one or many value
else :
df_filtered = df[df[category].isin(list) & df['Year'].isin(year)]
return df_filtered
"""
Year -> take a list of date to select one or many
Category -> If you need country data or ContinentData
Range -> take string if Yes : return year in a range, else : selected year only
df -> taking the dataframe
list -> list of country
type -> take a list of continent or country
plotType ->'bar' or 'line'
marker if line is True
"""
"\n\nYear -> take a list of date to select one or many\nCategory -> If you need country data or ContinentData\nRange -> take string if Yes : return year in a range, else : selected year only\ndf -> taking the dataframe \nlist -> list of country\ntype -> take a list of continent or country\nplotType ->'bar' or 'line'\nmarker if line is True\n"
def renderPlot(year, category, range, list, df,type, plotType, mark):
category = category.capitalize()
filterData = getFilter(year, category, range, list, df)
if plotType == 'line':
BernardPivot = getPivot(filterData, category, type )
return getPlot(BernardPivot,category,type, plotType, mark)
else:
return getPlot(filterData,category,type, plotType, mark)
renderPlot([1995,1998, 2000],'Country', 1, ['France', 'Belgium','Yemen'], df1,'coTons', 'line', True)
renderPlot([1995, 2002, 2000],'Continent', 'Yes', ['europe', 'asia'], df1,'coTons', 'bar', True)
renderPlot([1995, 2002],'Continent', 'Yes', ['europe', 'asia'], df1,'coTons', 'line', True)